suppressPackageStartupMessages({
library(tidyverse)
library(irlba)
library(DropletUtils)
library(scater)
library(scran)
library(Seurat) ## just 4 loading the object
library(SingleCellExperiment)
library(miloR)
library(patchwork)
library(igraph)
})
There were 24 warnings (use warnings() to see them)
Using data from Ramachandran et al. 2019 (GEO accessiion: GSE136103). The Seurat object was downloaded from https://datashare.is.ed.ac.uk/handle/10283/3433, upon request to the authors.
load("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue.rdata")
## Convert to SingleCellExperiment
liver_sce <- SingleCellExperiment(assay = list(counts=tissue@raw.data, logcounts=tissue@data),
colData = tissue@meta.data)
dec_liver <- modelGeneVar(liver_sce)
# Visualizing the fit:
fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
ylab="Variance of log-expression")
hvgs <- getTopHVGs(dec_liver, n=3000)
liver_sce <- runPCA(liver_sce, subset_row=hvgs, ncomponents=11)
plotPCA(liver_sce, colour_by="condition", ncomponents=3)
scater::plotUMAP(liver_sce, colour_by="condition", point_alpha=1, point_size=0.5)
Error in reducedDimNames(object) : object 'liver_sce' not found
Let’s test for differential abundance between healthy and cirrhotic livers.
liver_meta <- as.tibble(colData(liver_milo)[,c("dataset","condition")])
`as.tibble()` is deprecated as of tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
[90mThis warning is displayed once every 8 hours.[39m
[90mCall `lifecycle::last_warnings()` to see where this warning was generated.[39m
liver_meta <- distinct(liver_meta) %>%
mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
column_to_rownames("dataset")
## Save milo object and results
saveRDS(liver_milo,"/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
write_csv(milo_res, "/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
liver_milo <- readRDS("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
milo_res <- read_csv("/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
Parsed with column specification:
cols(
logFC = [32mcol_double()[39m,
logCPM = [32mcol_double()[39m,
F = [32mcol_double()[39m,
PValue = [32mcol_double()[39m,
FDR = [32mcol_double()[39m,
Nhood = [32mcol_double()[39m,
SpatialFDR = [32mcol_double()[39m,
annotation_indepth = [31mcol_character()[39m,
annotation_indepth_fraction = [32mcol_double()[39m,
annotation_lineage = [31mcol_character()[39m
)
#
# liver_milo_2 <- Milo(as(liver_milo, 'SingleCellExperiment'))
#
# liver_milo_2@graph <- liver_milo@graph
# liver_milo@nhoodDistances <- liver_milo2@nhoodDistances
# liver_milo_2@graph <- liver_milo@graph
Visualize results
milo_res %>%
ggplot(aes(logFC, -log10(SpatialFDR))) +
geom_point(size=0.4) +
geom_hline(yintercept = -log10(0.1))
Check cell type composition of DA neighbourhoods
#' Add annotation of most frequent cell type per nhood to milo results table
add_nhood_coldata_to_res <- function(liver_milo, milo_res, coldata_col){
nhood_counts <- sapply(seq_along(nhoods(liver_milo)), function(x) table(colData(liver_milo)[as.vector(nhoods(liver_milo)[[x]]), coldata_col]))
nhood_counts <- t(nhood_counts)
rownames(nhood_counts) <- seq_along(nhoods(liver_milo))
max_val <- apply(nhood_counts, 1, function(x) colnames(nhood_counts)[which.max(x)])
max_frac <- apply(nhood_counts, 1, function(x) max(x)/sum(x))
milo_res[coldata_col] <- max_val
milo_res[paste0(coldata_col, "_fraction")] <- max_frac
return(milo_res)
}
milo_res <- add_nhood_coldata_to_res(liver_milo, milo_res, "annotation_indepth")
milo_res$annotation_lineage.x <- NULL
milo_res$annotation_lineage.y <- NULL
milo_res$annotation_lineage <- NULL
anno_df <- data.frame(liver_milo@colData) %>%
distinct(annotation_lineage, annotation_indepth)
milo_res <- left_join(milo_res, anno_df, by="annotation_indepth")
rownames(milo_res) <- 1:nrow(milo_res)
Setting row names on a tibble is deprecated.
This shows that I can recover all the clusters where DA was detected in the original paper (see all the barplots for each lineage) and more! All in a single analysis, and without knowing where the subclusters are. Let’s bear in mind that positive logFC –> more cirrhotic, negative logFC —> more healthy
paper_DA <- list(cirrhotic=c("MPs (4)","MPs (5)",
"Endothelia (6)", "Endothelia (7)",
"Mes (3)",
"Tcells (2)",
"Myofibroblasts"
),
healthy=c("MPs (7)",
"Endothelia (1)",
"Tcells (1)", "Tcells (3)","Tcells (1)",
"ILCs (1)"
)
)
expDA_df <- bind_rows(
data.frame(annotation_indepth = paper_DA[["cirrhotic"]], pred_DA="cirrhotic"),
data.frame(annotation_indepth = paper_DA[["healthy"]], pred_DA="healthy")
)
pl1 <- milo_res %>%
left_join(expDA_df) %>%
mutate(is_signif = ifelse(SpatialFDR < 0.1, 1, 0)) %>%
mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
arrange(annotation_lineage) %>%
mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
ggplot(aes(annotation_indepth, logFC, color=logFC_color)) +
scale_color_gradient2() +
guides(color="none") +
xlab("annotation") + ylab("Log Fold Change") +
ggbeeswarm::geom_quasirandom(alpha=1) +
coord_flip() +
facet_grid(annotation_lineage~., scales="free", space="free") +
theme_bw(base_size=16) +
theme(strip.text.y = element_text(angle=0),
axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
)
Joining, by = "annotation_indepth"
pl2 <- milo_res %>%
left_join(expDA_df) %>%
# dplyr::filter(!is.na(pred_DA)) %>%
group_by(annotation_indepth) %>%
summarise(pred_DA=dplyr::first(pred_DA), annotation_lineage=dplyr::first(annotation_lineage)) %>%
mutate(end=ifelse(pred_DA=="healthy", 0, 1),
start=ifelse(pred_DA=="healthy", 1, 0)) %>%
ggplot(aes(annotation_indepth, start, xend = annotation_indepth, yend = end, color=pred_DA)) +
geom_segment(size=1,arrow=arrow(length = unit(0.1, "npc"), type="closed")) +
coord_flip() +
xlab("annotation") +
facet_grid(annotation_lineage~.,
# annotation_lineage~"Ramachandran et al.\nDA predictions",
scales="free", space="free") +
# guides(color="none") +
scale_color_brewer(palette="Set1", direction = -1,
labels=c("enriched in cirrhotic", "enriched in healthy"),
na.translate = F,
name="Ramachandran et al.\nDA predictions") +
guides(color=guide_legend(ncol = 1)) +
theme_bw(base_size=16) +
ylim(-0.1,1.1) +
theme(strip.text.y = element_blank(),strip.text.x = element_text(angle=90),
plot.margin = unit(c(0,0,0,0), "cm"), panel.grid = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
legend.position = "bottom")
Joining, by = "annotation_indepth"
`summarise()` ungrouping output (override with `.groups` argument)
(pl2 + pl1 +
plot_layout(widths=c(1,10), guides = "collect") & theme(legend.position = 'top', legend.justification = 0)) +
ggsave("~/milo_output/liver_DAcomparison.pdf", width=8, height = 13)
liver_milo <- buildNhoodGraph(liver_milo)
[1] "Calculating nhood adjacency...."
I will try this on the endothelial lineage. I want to merge overlapping nhoods with significant DA and the same direction of logFC, and then test for differential expression between cells in up and down nhoods (I guess you could also do up or down VS all the rest). This allows us to perform a comparison without further clustering.
subset.nhoods[1:(length(subset.nhoods)-1)]
[1] 11 20 21 22 26 46 49 50 83 104 112 118 119 128 132 149 151 156 162 193 226 233 246 272 295 296 309 315 325 330
[31] 360 364 365 371 377 382 385 386 388 391 395 401 420 422 438 443 449 479 480 481 485 499 500 505 527 531 536 537 538 551
[61] 553 559 560 569 576 594 605 613 614 620 621 645 660 666 682 690 702 705 711 716 725 726 736 738 751 752 768 772 773 781
[91] 782 799 806 810 812 821 837 849 852 865 870 923 926 928 970 986 1002 1006 1010 1013 1051 1053 1054 1061 1073 1086 1088 1093 1096 1097
[121] 1108 1118 1121 1126 1137 1147 1148 1152 1153 1171 1182 1188 1190 1196 1214 1217 1224 1232 1236 1238 1241 1251 1256 1258 1262 1263 1266 1268 1269 1275
[151] 1277 1285 1289 1306 1312 1313 1339 1343 1348 1352 1367 1379 1380 1391 1419 1422 1424 1427 1433 1436 1443 1450 1454 1465 1481 1497 1502 1524 1528 1538
[181] 1540 1550 1568 1572 1582 1597 1608 1609 1619 1623 1627 1629 1640 1652 1653 1654 1659 1661 1663 1669 1673 1675 1677 1679 1680 1685 1690 1700 1702 1709
[211] 1714 1718 1726 1730 1732 1737 1739 1749 1765 1766 1771 1773 1787 1804 1805 1817 1828 1860 1864 1869 1872 1878 1887 1905 1910 1922 1926 1927 1930 1936
[241] 1946 1959 1972 1984 1989 2004 2020 2026 2041 2045 2053 2072 2079 2116 2118 2119 2122 2126 2130 2136 2140 2141 2145 2152 2155 2157 2164 2168 2177 2185
[271] 2190 2193 2199 2207 2209 2211 2215 2219 2227 2229 2233 2258 2260 2278 2291 2292 2334 2351 2355 2373 2377 2394 2396 2405 2423 2424 2429 2449 2450 2455
[301] 2462 2463 2473 2489 2491 2495 2498 2501 2504 2507 2508 2517 2527 2534 2552 2557 2559 2563 2567 2584 2585 2587 2589 2593 2602 2606 2609 2615 2620 2622
[331] 2626 2632 2639 2646 2653 2657 2664 2666 2672 2673 2675 2677 2684 2685 2691 2692 2695
endo_milo <- buildNhoodGraph(endo_milo)
[1] "Calculating nhood adjacency...."
'CD59' %in% endo_hvgs
[1] FALSE
nhood_markers <- findNhoodMarkers(liver_milo, milo_res, overlap=1, assay="logcounts", return.groups = TRUE,
subset.row = endo_hvgs,
subset.nhoods = milo_res$annotation_lineage=="Endothelia")
Found 1404 DA neighbourhoods at FDR 10%
Nhoods aggregated into 2 groups
marker_genes <-
nhood_markers$dge %>%
pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "adj.P.Val_")) %>%
pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "logFC_"), names_to='smp', values_to="logFC") %>%
mutate(name=str_remove(name, "adj.P.Val_"), smp=str_remove(smp,"logFC_")) %>%
dplyr::filter(smp==name) %>%
dplyr::filter(smp==1) %>%
mutate(logFC_dir=ifelse(logFC < 0, "down", 'up')) %>%
group_by(logFC_dir) %>%
top_n(n=50, -log10(value)) %>%
ungroup() %>%
# dplyr::filter(value < 0.01 & abs(logFC) > 0.02) %>%
pull(GeneID) %>%
unique()
nhood_markers$dge %>%
pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "adj.P.Val_")) %>%
pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "logFC_"), names_to='smp', values_to="logFC") %>%
mutate(name=str_remove(name, "adj.P.Val_"), smp=str_remove(smp,"logFC_")) %>%
dplyr::filter(smp==name) %>%
mutate(color=ifelse(value<0.05, 1, 0),
label=ifelse(GeneID %in% marker_genes, GeneID,NA)) %>%
ggplot(aes(logFC, -log10(value), color=color)) +
geom_point(size=0.1) +
facet_wrap(name~., scales="free") +
geom_hline(yintercept = -log10(0.01)) +
ggrepel::geom_text_repel(aes(label=label))
marker_genes <- nhood_markers$dge %>%
dplyr::filter(adj.P.Val_1 < 0.01) %>%
pull(GeneID)
Customize plotting for paper figure
x <- liver_milo
da.res <- milo_res
cluster_features = TRUE
alpha = 0.05
scale_to_1 = TRUE
subset.nhoods = milo_res$annotation_lineage=="Endothelia"
features <- marker_genes
expr_mat <- nhoodExpression(x)[features,]
colnames(expr_mat) <- 1:length(nhoods(x))
## Get nhood expression matrix
if (!is.null(subset.nhoods)) {
expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
}
if (!isFALSE(scale_to_1)) {
expr_mat <- t(apply(expr_mat, 1, function(x) (x - min(x))/(max(x)- min(x))))
}
rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame
pl_df <- data.frame(t(expr_mat)) %>%
rownames_to_column("Nhood") %>%
mutate(Nhood=as.double(Nhood)) %>%
left_join(da.res, by="Nhood") %>%
mutate(logFC_rank=percent_rank(logFC))
## Top plot: nhoods ranked by DA log FC
pl_top <- pl_df %>%
mutate(is_signif = ifelse(SpatialFDR < alpha, paste0("SpatialFDR < ", alpha), NA)) %>%
ggplot(aes(logFC_rank, logFC)) +
geom_hline(yintercept = 0, linetype=2) +
geom_point(size=0.2, color="grey") +
geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=1) +
theme_bw(base_size=16) +
ylab("DA logFC") +
scale_color_manual(values="red", name="") +
scale_x_continuous(expand = c(0.01, 0)) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
## Bottom plot: gene expression heatmap
if (isTRUE(cluster_features)) {
row.order <- hclust(dist(expr_mat))$order # clustering
ordered_features <- rownames(expr_mat)[row.order]
} else {
ordered_features <- rownames(expr_mat)
}
Let’s check the genes identified as markers for the disease subtypes. Are they significantly differentially expressed between DA neighbourhoods? Yes they are!
disease_endo_markers <- c("ACKR1", 'CD34',"VWA1")
data.frame(markers$negLogFC_2) %>%
filter(FDR < 0.05) %>%
.[disease_endo_markers,]
data.frame(markers$negLogFC_1) %>%
filter(FDR < 0.05) %>%
.[disease_endo_markers,]
Visualize some of the markers that differentiate DA neighbourhoods, by plotting the percent of cells expressing each gene in a nhood.
## Define plotting functions
.calculate_nhood_perc_expression <- function(milo, nhoods, gene){
gene_cnts <- counts(milo)[gene,]
perc_expr <- sapply(nhoods(milo)[nhoods], function(x) sum(gene_cnts[x]>0)/length(x))
perc_expr <- setNames(perc_expr, nhoods)
return(perc_expr)
}
.plot_nhood_expression <- function(milo, nhoods, features){
perc_expr_mat <- sapply(features,
function(x) .calculate_nhood_perc_expression(milo, nhoods, x))
pl_df <- data.frame(perc_expr_mat) %>%
rownames_to_column("Nhood") %>%
mutate(Nhood=as.double(Nhood)) %>%
left_join(milo_res) %>%
mutate(logFC_rank=rank(logFC))
pl_top <- pl_df %>%
mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>%
ggplot(aes(logFC_rank, logFC)) +
geom_hline(yintercept = 0, linetype=2) +
geom_point(size=0.2) +
geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) +
theme_bw() +
scale_color_manual(values="red", name="") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
pl_bottom <- pl_df %>%
pivot_longer(cols=features, names_to='feature', values_to="perc_expressed") %>%
mutate(feature=factor(feature, levels=features)) %>%
ggplot(aes(logFC_rank, feature, fill=perc_expressed)) +
geom_tile() +
scale_fill_viridis_c(option="magma") +
ggbio::theme_clear()
(pl_top / pl_bottom) + plot_layout(heights = c(1,2))
}
endo_nhoods <- endo_res %>% pull(Nhood)
## Select genes and sort by AUC
feats_neg2 <-
data.frame(markers$negLogFC_2) %>%
top_n(50, - log10(FDR)) %>%
arrange(AUC.posLogFC_1) %>%
rownames()
.plot_nhood_expression(liver_milo, endo_nhoods, features=feats)
As described in the paper, we have that genes associated with extracellular matrix organization (e.g. VIM, ) are over expressed
## Select genes and sort by AUC
feats_neg1 <-
data.frame(markers$negLogFC_1) %>%
top_n(50, - log10(FDR)) %>%
arrange(AUC.posLogFC_1) %>%
rownames()
.plot_nhood_expression(liver_milo, endo_nhoods, features=feats)
feats_neg1vsNeg2 <-
data.frame(markers$negLogFC_1) %>%
top_n(50, - log10(FDR)) %>%
arrange(AUC.negLogFC_2) %>%
rownames()
.plot_nhood_expression(liver_milo, endo_nhoods, features=feats_neg1vsNeg2)